---
title: "U.S. National Parks Visits (1904-2016)"
description: "Tidy Tuesday: Week 38, 2019"
author:
- name: Mickey Rafa
url: https://mrafa3.github.io/
date: 12-30-2022
categories: [R, "#TidyTuesday", bump-chart, gt, web-scraping] # self-defined categories
image: plot_viz_538.png
draft: false # setting this to `true` will prevent your post from appearing on your listing page until you're ready!
format:
html:
toc: true
toc-depth: 5
code-link: true
code-fold: true
code-tools: true
code-summary: "Show code"
self-contained: true
editor_options:
chunk_output_type: inline
execute:
error: false
message: false
warning: false
eval: true
---
# 1. Load Packages & Setup
```{r setup, include=TRUE}
if (!require("pacman")) install.packages("pacman")
pacman::p_load(
tidyverse,
tidytuesdayR,
dlookr,
ggtext,
gt,
gtExtras, #for font awesome icons in gt tables
ggbump,
showtext,
janitor, #for clean_names()
scales,
htmltools, #for tagList()
glue,
here,
geomtextpath
)
font_add('fa-brands', 'fonts/Font Awesome 6 Brands-Regular-400.otf')
sysfonts::font_add_google("Lato","lato")
showtext::showtext_auto()
showtext::showtext_opts(dpi=300)
```
# 2. Read in the Data
```{r read_data, include=TRUE}
tuesdata <- tidytuesdayR::tt_load(2019, week = 38)
df <- tuesdata$national_parks
```
# 3. Examine the Data
```{r glimpse, include=TRUE}
df %>%
glimpse()
```
# 4. Tidy the Data
```{r df_annual}
df_annual <- df %>%
filter(year != 'Total',
unit_type == 'National Park') %>%
mutate(year = as.numeric(year)) %>%
mutate(decade = as.factor(year - (year %% 10))) %>%
arrange(year) %>%
group_by(year) %>%
mutate(annual_visitor_rank = as.integer(rank(-visitors))) %>%
select(annual_visitor_rank, year, unit_name, visitors, everything()) %>%
arrange(year, annual_visitor_rank) %>%
ungroup() %>%
separate(col=unit_name, into = c("parkname_full", "parktype"), sep = "National Park",
remove=FALSE)
```
```{r highlight_parks, include=TRUE}
highlight_list_annual <- df_annual %>%
filter(year ==2016,
annual_visitor_rank <=5) %>%
pull(unit_name)
```
```{r df_decade, include=TRUE}
df_decade <- df %>%
filter(unit_type == 'National Park',
year != 'Total') %>%
mutate(year = as.numeric(year)) %>%
mutate(decade = year - (year %% 10)) %>%
group_by(decade, unit_name, .groups=TRUE) %>%
summarise(visitors_by_decade = sum(visitors, na.rm = TRUE)) %>%
ungroup() %>%
group_by(decade) %>%
mutate(rank_visitors_by_decade = as.integer(rank(-visitors_by_decade))) %>%
ungroup() %>%
separate(col=unit_name, into = c("parkname_full", "parktype"), sep = "National Park",
remove=FALSE)
top_1900s <- df_decade %>% filter(decade == 1900) %>% arrange(rank_visitors_by_decade) %>% head(5) %>% pull(unit_name)
top_2010s <- df_decade %>% filter(decade == 2010) %>% arrange(rank_visitors_by_decade) %>% head(5) %>% pull(unit_name)
```
# 5. Visualization Parameters
```{r my_theme, include=TRUE}
my_theme <- theme(
text = element_text(family = 'lato'),
plot.title = element_textbox_simple(color="black", face="bold", size=20, hjust=0),
plot.subtitle = element_textbox_simple(color="black", size=12, hjust=0),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank(),
plot.caption = element_textbox_simple(color="black", size=12),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
legend.title=element_blank(),
legend.text = element_text(color="black", size=12, hjust=0),
legend.position = 'top',
strip.text = element_text(color="black", size=14))
title <- tagList(p('Ranking of popularity of U.S. National Parks'))
subtitle <- tagList(span('*by the number of visitors annually*'))
caption <- paste0("<span style='font-family:lato;'>**Source**: TidyTuesday Week 38 (2019)</span><br>",
"<span style='font-family:fa-brands;'></span>",
"<span style='font-family:lato;'>@mickey.rafa</span>",
"<span style='font-family:lato;color:white;'>....</span>",
"<span style='font-family:fa-brands;'></span>",
"<span style='font-family:lato;color:white;'>.</span>",
"<span style='font-family:lato;'>mrafa3</span>")
color_palette <- viridisLite::mako(8)
description_color <- 'grey40'
subtitle_3 <- tagList(span('*by the number of visitors by decade*'))
```
# 6. Plot
```{r plot_viz_538, include=TRUE, fig.align='center'}
(plot_viz_538 <- df_annual %>%
ggplot(.,
aes(x=year,
y=-annual_visitor_rank,
group=unit_name,
color=unit_name)) +
geom_line(color='gray80') +
geom_line(data=. %>% filter(unit_name %in% highlight_list_annual)) +
ggrepel::geom_text_repel(
data = df_annual %>% filter(year == 2016, unit_name %in% highlight_list_annual),
aes(label = paste("#", annual_visitor_rank, parkname_full)),
nudge_x = 15,
size = 3,
direction = 'y',
fontface = 'bold'
) +
labs(x='',
title = title,
subtitle = subtitle,
caption = caption) +
coord_cartesian(xlim = c(1900, 2040), ylim = c(-65, 2), expand = F) +
my_theme +
theme(legend.position = 'none'))
```
```{r viz_decade_bump, include=TRUE, fig.align='center'}
(plot_viz_decade_bump <- df_decade %>%
filter(unit_name %in% c(top_1900s, top_2010s),
decade >= 1900) %>%
ggplot(.,
aes(x=decade,
y=-rank_visitors_by_decade,
col=unit_name)) +
geom_point(shape = '|', stroke = 6) +
geom_bump(linewidth = 1) +
ggrepel::geom_text_repel(
data = df_decade %>% filter(decade == 1900, unit_name %in% top_1900s),
aes(label = paste('#',rank_visitors_by_decade, " ", parkname_full, sep = "")),
#nudge_x = -1,
hjust = 1,
size = 4,
direction = "y",
fontface = 'bold'
) +
ggrepel::geom_text_repel(
data = df_decade %>% filter(decade == 2010, unit_name %in% top_2010s),
aes(label = paste('#',rank_visitors_by_decade, " ", parkname_full, sep = "")),
hjust = 0,
nudge_x = 1,
size = 4,
direction = "y",
fontface = 'bold'
) +
geom_text(
data = df_decade %>% filter(decade == 2010, unit_name %in% c('Hot Springs National Park', 'Wind Cave National Park', 'Crater Lake National Park')),
aes(label = paste('#',rank_visitors_by_decade, " ", parkname_full, sep = "")),
hjust = 0,
nudge_x = 1,
size = 4,
fontface = 'bold'
) +
annotate(
'text',
x = c(1898, 2012),
y = c(5, 5),
label = c('1900s', '2010s'),
hjust = c(0, 1),
vjust = 1,
size = 6,
fontface = 'bold') +
coord_cartesian(xlim = c(1860, 2070), ylim = c(-45, 10), expand = F) +
#theme_void() +
my_theme +
theme(legend.position = 'none',
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
#plot.background = element_rect(fill = color_palette[8], color = NA),
text = element_text(
color = description_color
)
) +
labs(
title = title,
subtitle = subtitle_3,
caption = caption
))
```
# 2010 to present
```{r}
(df_recent_all_types <- df %>%
filter (year != 'Total' ) %>%
mutate (year = as.numeric (year)) %>%
filter (year >= 2010 ,
! unit_type %in% c ('Parkway' , 'National Parkway' )) %>%
group_by (year) %>%
mutate (rank_annual_visitors = as.integer (rank (- visitors))) %>%
arrange (year, rank_annual_visitors))
```
```{r}
(highlight_df_recent_all_types <- df_recent_all_types %>%
ungroup () %>%
filter (rank_annual_visitors <= 6 ) %>%
distinct (unit_name) %>%
pull ())
```
```{r}
df_recent_all_types %>%
filter (rank_annual_visitors <= 6 ) %>%
ggplot (.,
aes (x= year,
y= rank_annual_visitors,
col= unit_name)) +
geom_point (shape = '|' , stroke = 6 ) +
geom_bump (size = 1 ) +
geom_text (
data = df_recent_all_types %>% filter (year == 2010 , unit_name %in% highlight_df_recent_all_types),
#should string manip the labels to cut out National Park from name
aes (label = unit_name),
size = 3 ,
hjust = 1 ,
nudge_x = - 0.1 ,
fontface = 'bold'
) +
geom_text (
data = df_recent_all_types %>% filter (year == 2016 , unit_name %in% highlight_df_recent_all_types),
aes (label = rank_annual_visitors),
hjust = 0 ,
nudge_x = 1 ,
size = 5 ,
fontface = 'bold'
) +
annotate (
'text' ,
x = c (2010 , 2016 ),
y = c (0.25 , 0.25 ),
label = c ('2010' , '2016' ),
hjust = c (0 , 1 ),
vjust = 1 ,
size = 4 ,
fontface = 'bold' ,
color = description_color) +
scale_y_reverse (position = 'right' , breaks = seq (20 , 2 , - 2 )) +
#scale_color_manual(values = country_colors) +
coord_cartesian (xlim = c (2005 , 2020 ), ylim = c (12 , 0 ), expand = F) +
#theme_void() +
my_theme +
theme (legend.position = 'none' ,
panel.grid.major.x = element_blank (),
panel.grid.minor.x = element_blank (),
#plot.background = element_rect(fill = color_palette[8], color = NA),
text = element_text (
color = description_color
)
) +
labs (
title = 'Ranking the popularity of U.S. National Parks' ,
subtitle = 'by total visitors each decade'
)
```
# Scraping Deferred Maintainance
```{r read_maintenance_url, include=TRUE}
url <- 'https://www.nps.gov/common/uploads/sortable_dataset/infrastructure/9143F3CC-B3B8-3F78-2F70BE310E528B37/nri-DMParkFactSheettoConverttoaCSV-Copy.csv?t=1662071341782'
df_maintenance <- read_csv(url) %>%
clean_names() %>%
mutate(deferred_maintenance = as.numeric(gsub('[$,]', '', dm_r))) %>%
select(park_name, deferred_maintenance)
```
```{r scatter_maintenance_visitors, include=FALSE}
df_recent_all_types %>%
filter(year == 2016) %>%
left_join(x=.,
y=df_maintenance,
by=c('unit_name' = 'park_name')) %>%
ggplot(.,
aes(x=visitors,
y=deferred_maintenance)) +
geom_point() +
#geom_smooth(stat = 'lm') +
#geom_text(data=. %>% filter(integer(rank(deferred_maintenance)) <= 5),
# aes(label=unit_name)) +
my_theme
```
## Table
```{r}
df_tbl <- df_annual %>%
group_by (unit_name) %>%
mutate (year_opened = min (year)) %>%
ungroup () %>%
select (year, unit_name, visitors, state, year_opened) %>%
filter (year %in% c (2006 , 2016 )) %>%
spread (year, visitors) %>%
mutate (percent_change_2006_2016 = (` 2016 ` - ` 2006 ` )/ ` 2006 ` ,
rank_visitors_2006 = as.integer (rank (- ` 2006 ` )),
rank_visitors_2016 = as.integer (rank (- ` 2016 ` )),
rank_change_2016 = rank_visitors_2006 - rank_visitors_2016) %>%
select (unit_name, state, year_opened, rank_visitors_2016, visitors= ` 2016 ` ,
percent_change_2006_2016, rank_change_2016) %>%
left_join (x= .,
y= df_maintenance,
by= c ('unit_name' = 'park_name' )) %>%
ungroup () %>%
mutate (deferred_maintenance = round (deferred_maintenance / 1000000 , 0 )) %>%
arrange (- visitors) %>%
slice (1 : 15 )
```
```{r min_max_palette, include=TRUE}
min_visitors <- df_tbl$visitors %>% min()
max_visitors <- df_tbl$visitors %>% max()
visitors_palette <- col_numeric(c("#e5f5e0", "#31a354"),
domain = c(min_visitors, max_visitors),
alpha = .75)
max_deferred <- df_tbl$deferred_maintenance %>% max()
min_deferred <- df_tbl$deferred_maintenance %>% min()
deferred_palette <- col_numeric(c("#fee0d2", "#de2d26"),
domain = c(min_deferred, max_deferred),
alpha = .75)
```
```{r tbl_viz, include=TRUE}
(tbl_viz <- df_tbl %>%
gt() %>%
#rename columns
cols_label(rank_visitors_2016 = 'Rank Visitors',
unit_name = 'Park',
visitors = 'Visitors',
percent_change_2006_2016 = '% Change',
rank_change_2016 = 'Rank Change',
state = 'State',
year_opened = 'Year Opened',
deferred_maintenance = 'Deferred Maintenance & Repairs Est. (2021, Millions)') %>%
#format numeric columns
fmt_number(columns = c(visitors),
sep_mark = ",",
decimals = 0) %>%
fmt_percent(columns = c(percent_change_2006_2016),
decimals = 0) %>%
fmt_currency(columns = c(deferred_maintenance),
decimals = 0) %>%
#add tab spanners
tab_spanner(
label = md('**2006 - 2016**'),
columns = c(percent_change_2006_2016, rank_change_2016)
) %>%
tab_spanner(
label = md('**2016**'),
columns = c(rank_visitors_2016, visitors)
) %>%
#add table title
tab_header(title = md("**Most Popular U.S. National Parks in 2016**"),
subtitle = 'by estimated number of visitors') %>%
tab_source_note(source_note = md("Visitor data sourced from Tidy Tuesday Week 38 (2019)<br>Deffered Maintenance & Repairs estimates sourced from nps.gov")) %>%
#apply new style to all column headers
tab_style(
locations = cells_column_labels(columns = everything()),
style = list(
#thick border
cell_borders(sides = "bottom", weight = px(3)),
#make text bold
cell_text(weight = "bold")
)
) %>%
#apply different style to title
tab_style(locations = cells_title(groups = "title"),
style = list(
cell_text(weight = "bold", size = 24)
)) %>%
data_color(columns = c(visitors),
colors = visitors_palette) %>%
data_color(columns = c(deferred_maintenance),
colors = deferred_palette) %>%
opt_all_caps() %>%
opt_table_font(
font = list(
google_font("Chivo"),
default_fonts()
)
) %>%
tab_options(
#remove border between column headers and title
column_labels.border.top.width = px(3),
column_labels.border.top.color = "transparent",
#remove border around the table
table.border.top.color = "transparent",
table.border.bottom.color = "transparent",
#adjust font sizes and alignment
source_notes.font.size = 12,
heading.align = "left"
))
```
# 7. Save
# 8. Session Info
::: {.callout-tip collapse="true"}
##### Expand for Session Info
```{r, echo = FALSE}
#| eval: true
#| warning: false
sessionInfo()
```
:::
# 9. Github Repository
::: {.callout-tip collapse="true"}
##### Expand for GitHub Repo
[ Access the GitHub repository here ](https://github.com/mrafa3/mrafa3.github.io)
:::